* This Stata dofile is written to accompany the papers:
* A Leigh & P van der Eng, 'Inequality in Indonesia: What Can We Learn from Top Incomes?' (2009), Journal of Public Economics, 93(1-2): 209-212
* A Leigh & P van der Eng, 'Top Incomes in Indonesia 1920-2004' in A.B. Atkinson and T. Piketty (eds) (2009) Top Incomes Over the Twentieth Century: Volume II - A Global Perspective, Oxford, Oxford University Press
* Feel free to use or adapt it, but please cite those papers.
* Questions to andrew_leigh@ksg02.harvard.edu

* This program calculates top income shares among all adults aged 15+, dividing household income equally among adults.
* NB. In Bahasa: 
* Age=umur
* Total income=jumlah pendapatan (root word for income is dapat or pendapat) 
* Total cash income=upah berupa uang
* Total non-cash income=upah berupa barang/jasa
* Other relevant words: small agriculture=pangan, farmers=perkebunan, others=larnnya
* No of persons in household=jumlah anggota r.tangga (or banyaknya angotta rt)
 
* The Susenas files used in this dofile are from two sources - the Indonesia project at ANU (contact: Terry Hull) 
* and the Australian Social Science Data Archive
* where ASSDA files are used, the numbers are listed below.
* We are not permitted to provide these data to others. Interested users shoould make their own arrangements to 
* obtain the Susenas files.

* ASSDA No 774: Susenas 1982
* ASSDA No 1071: Susenas 1995
* ASSDA No 1072: Susenas 1996
* ASSDA No 1073: Susenas 1998
* ASSDA No 1074: Susenas 1999
* ASSDA No 1065: Susenas 2000
* ASSDA No 1066: Susenas 2001
* ASSDA No 1067: Susenas 2002
* ASSDA No 1075: Susenas 2003
* English versions of most Susenas codebooks are at http://www.rand.org/labor/bps.data/webdocs/susenas/susenas_main.htm
* Expenditure: pengeluaran or konsumsi or pemakaian
* GE(0) is the mean logarithmic deviation, GE(1) is the Theil index.

version 10
clear
set more off
set mem 350m
program drop _all
cd "C:\Users\Andrew\My publications\Indonesian Top Incomes\"

program define shares
  *bysort hhid: egen temp=max(income)
  *bysort hhid: egen hhsize=count(age)
  *egen mean_inc_household=mean(income)
  *egen med_inc_household=median(income)
  *replace income=temp/hhsize
  *drop if income==.
  *drop temp
  *sum hhsize
  *recode income 0=.
  recode wage 0=.
  for any income wage: replace X=X*12 if $y~=1996
  * Top income shares
  _pctile income [aw=weight], percentiles (90 95 99 99.5 99.9 99.95 99.99)
  return list
  gen cutoff10=r(r1)
  gen cutoff5=r(r2)
  gen cutoff1=r(r3)
  gen cutoff05=r(r4)
  gen cutoff01=r(r5)
  gen cutoff005=r(r6)
  gen cutoff001=r(r7)
  egen total_income=sum(income)
  for any 10 5 1 05 01 005 001: egen temp_a=sum(income) if income>=cutoffX \ egen temp_b=max(temp_a) \ gen shareX=(temp_b)/(total_income) \ drop temp*
  ineqdeco income [aw=weight]
  gen gini=r(gini)
  sum share* gini
  drop total_income 
  * Top wage shares
  sum wage [aw=weight],d
  _pctile wage [aw=weight], percentiles (90 95 99 99.5 99.9 99.95 99.99)
  return list
  gen cutoffw10=r(r1)
  gen cutoffw5=r(r2)
  gen cutoffw1=r(r3)
  gen cutoffw05=r(r4)
  gen cutoffw01=r(r5)
  gen cutoffw005=r(r6)
  gen cutoffw001=r(r7)
  egen total_wage=sum(wage)
  for any 10 5 1 05 01 005 001: egen temp_a=sum(wage) if wage>=cutoffwX \ egen temp_b=max(temp_a) \ gen sharewX=(temp_b)/(total_wage) \ drop temp*
  ineqdeco expend [aw=weight]
  gen ginie=r(gini)
  gen _5010e=1/r(p10p50)
  gen _9010e=r(p90p10)
  gen _9050e=r(p90p50)
  gen ge0e=r(ge0)
  gen ge1e=r(ge1)
  ineqdeco wage [aw=weight]
  gen giniw=r(gini)
  gen _5010w=1/r(p10p50)
  gen _9010w=r(p90p10)
  gen _9050w=r(p90p50)
  gen ge0w=r(ge0)
  gen ge1w=r(ge1)
  sum sharew* giniw _5010* _9010* _9050* ge*
  corr wage expend [aw=weight]
  gen corr_wage_expend=r(rho)
  collapse share* cutoff* gini* _5010* _9010* _9050* ge* corr_wage_expend (count) samplesize=income (mean) mean_inc_household=income (median) med_inc_household=income (sum) total_income=income (count) samplesizew=wage (mean) mean_wage_household=wage (median) med_wage_household=wage (sum) total_wage=wage [aw=weight]
  for var cutoff* samplesize* total_*: recast double X
  gen year=$y
  sort year
  cd "C:\Users\Andrew\My publications\Indonesian Top Incomes\"
  save shares_$y, replace
end

* Susenas 1980
cd "C:\Users\Andrew\Datasets\Susenas\"
use tipe daerah batch b1r1 b1r2 b1r7 b1r9 b1r11 b4r22 infl expend using susenas1980_t21.dta, clear
egen hhid=group(tipe daerah batch b1r1 b1r2 b1r7 b1r9)
* The next two lines show that the derived household size equals the household size variable in the dataset
bysort hhid: egen temp=count(infl)
list temp b1r11 in 1/50
drop temp
gen income=1
recode b4r22 9999999=.
bysort hhid: egen wage=sum(b4r22)
* Keep only one household (note that expenditure does not vary within households)
egen tag=tag(hhid)
keep if tag
drop tag
gen wage_expend_ratio=wage/expend
sum wage_expend_ratio,d
global y=1980
gen weight=infl
shares

* ASSDA No 774: Susenas 1982
* TG=G is the household break identifier. Individuals (TK=D) are in order within households.
use tk pd03 pf25c pf46 id11 using "C:\Users\Andrew\Datasets\Susenas\d0774.dta", clear
gen n=_n
tsset n
gen hhid=n if tk=="G"
for num 1/30: replace hhid=fX.hhid if hhid==.
egen temp=group(hhid)
replace hhid=temp
drop temp
* These two lines show that the derived hhsize=hhsize in dataset. bysort hhid: egen hhsize=count(pd03), sum hhsize id11 if hhsize~=id11 
ren pd03 age
*keep if age>=15 & age~=99
ren pf25c wage
ren pf46 income
recode income 9999=. min/0=0
for any wage income: replace X=X*1000 \ bysort hhid: egen temp=sum(X) \ replace X=temp \ drop temp
egen temp=tag(hhid) 
keep if temp==1
drop temp
gen expend=1
global y=1982
gen weight=1
shares

* Susenas 1987
cd "C:\Users\Andrew\Datasets\Susenas\"
#delimit ;
for num 1: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1987.dta", clear \ 
keep if b4ak01==99 \ ren income incomeX \ sort tipe_dok b1r01 b1r02 b1r05 b1r08 b1r09 b1r10 \ save temp1987_X, replace;
for num 2: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1987.dta", clear \ 
keep if b4bk01==99 \ ren income incomeX \ sort tipe_dok b1r01 b1r02 b1r05 b1r08 b1r09 b1r10 \ save temp1987_X, replace;
for num 3: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1987.dta", clear \ 
keep if b4ck01==9 \ ren income incomeX \ sort tipe_dok b1r01 b1r02 b1r05 b1r08 b1r09 b1r10 \ save temp1987_X, replace;
for num 4: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1987.dta", clear \ 
ren income incomeX \ sort tipe_dok b1r01 b1r02 b1r05 b1r08 b1r09 b1r10 \ save temp1987_X, replace;
#delimit cr
use temp1987_1, clear
for num 2 3 4: sort tipe_dok b1r01 b1r02 b1r05 b1r08 b1r09 b1r10 \ merge tipe_dok b1r01 b1r02 b1r05 b1r08 b1r09 b1r10 using temp1987_X \ tab _merge \ drop _merge
for var b4ak07 b4bk10 b4ck11 b4d11 b4d09: recode X .=0 \ sum X
gen income=b4ak07+b4bk10+b4ck11+b4d11-b4d09
gen wage=b4ak07
global y=1987
gen weight=inf_rmt
shares

* Susenas 1990
cd "C:\Users\Andrew\Datasets\Susenas\"
#delimit ;
for num 1: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1990.dta", clear \ 
keep if v5ak01==99 \ ren income incomeX \ sort prop kabu urru nks nurt jart \ save temp1990_X, replace;
for num 2: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1990.dta", clear \ 
keep if v5bk01==99 \ ren income incomeX \ sort prop kabu urru nks nurt jart \ save temp1990_X, replace;
for num 3: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1990.dta", clear \ 
keep if v5ck01==9 \ ren income incomeX \ sort prop kabu urru nks nurt jart \ save temp1990_X, replace;
for num 4: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 4-X Data - SUSENAS 1990.dta", clear \ 
ren income incomeX \ sort prop kabu urru nks nurt jart \ save temp1990_X, replace;
#delimit cr
use temp1990_1, clear
for num 2 3 4: sort prop kabu urru nks nurt jart \ merge prop kabu urru nks nurt jart using temp1990_X \ tab _merge \ drop _merge
for var v5ak07 v5bk10 v5ck11 v5d99 v5d09: recode X .=0 \ sum X
gen income=v5ak07+v5bk10+v5ck11+v5d99-v5d09
gen wage=v5ak07
reg income income4
global y=1990
gen weight=inf_rt
shares

* Susenas 1993
cd "C:\Users\Andrew\Datasets\Susenas\"
#delimit ;
for num 1: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 5-X Data - SUSENAS 1993.dta", clear \ 
keep if b5ak1==99 \ ren income incomeX \ sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd \ save temp1993_X, replace;
for num 2: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 5-X Data - SUSENAS 1993.dta", clear \ 
keep if b5bk1==99 \ ren income incomeX \ sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd \ save temp1993_X, replace;
for num 3: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 5-X Data - SUSENAS 1993.dta", clear \ 
keep if b5ck1==9 \ ren income incomeX \ sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd \ save temp1993_X, replace;
for num 4: use "C:\Users\Andrew\Datasets\Susenas\Income Module Part 5-X Data - SUSENAS 1993.dta", clear \ 
gen temp=b5dk3 if b5dk1==90 \ bysort b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd: egen imputedrent=max(temp) \ recode imputedrent .=0 \ drop temp \
gen temp=b5dk3 if b5dk1==990 \ bysort b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd: egen blockdincome=max(temp) \ recode blockdincome .=0 \ drop temp \
gen blockdincomenr=blockdincome-imputedrent \ keep if b5dk1==990 \
ren income incomeX \ sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd \ save temp1993_X, replace;
#delimit cr
use temp1993_1, clear
for num 2 3 4: sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd \ merge b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10 cd using temp1993_X \ tab _merge \ drop _merge
for var b5ak7 b5bk10 b5ck11 blockdincomenr: recode X .=0 \ sum X
gen income=b5ak7+b5bk10+b5ck11+blockdincomenr
gen wage=b5ak7
global y=1993
gen weight=inf_rt
shares

* ASSDA No 1071: Susenas 1995
* Income doesn't seem to be in the individuals, households, education1, education2, or housing file

* ASSDA No 1072: Susenas 1996
use "C:\Users\Andrew\Datasets\Susenas\income module part 4-1 data - susenas 1996.dta", clear
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r9 b1r10)
gen wage=b4r1
replace income=(b4r1+b4r2+b4r3+b4r4b)
global y=1996
gen weight=inf_rt
shares

* ASSDA No 1073: Susenas 1998 
* Record structure is by individual, so we sum by household.
use k1r* k2r2 usia k6r26 k6r27* weind983 using "C:\Users\Andrew\Datasets\Susenas\d1073coreindividuals.dta", clear
* Merge with hh file to get expenditure
sort k1r1 k1r2 k1r3 k1r4 k1r5 k1r9 k1r10
merge k1r1 k1r2 k1r3 k1r4 k1r5 k1r9 k1r10 using "C:\Users\Andrew\Datasets\Susenas\d1073corehouseholds1.dta", keep(k9r28)
ren k9r28 expend
egen hhid=group(k1r1 k1r2 k1r3 k1r4 k1r5 k1r9 k1r10)
for var k1r* k6r26 weind983: destring X, force replace
* These two lines show that the derived hhsize=hhsize in dataset (k2r2). bysort hhid: egen hhsize=count(usia). sum hhsize k2r2 if hhsize~=k2r2
ren usia age
*keep if age>=15 & age~=. & k6r26==4
* Cash wage=27a, In-kind wage=r27b
for any k6r27a k6r27b: recode X 99999999=.
gen wage=k6r27a+k6r27b
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
replace wage=. if temp~=1
drop temp
ren weind983 weight
global y=1998
gen income=1
shares

* Susenas 1999
* Income file is not decomposed by income type
use k1r* k6r27* weind* using "C:\Users\Andrew\Datasets\Susenas\d1074coreindividual.dta", clear
* Merge with hh file to get expenditure
sort k1r1 k1r2 k1r3 k1r4 k1r5 k1r9 k1r10
merge k1r1 k1r2 k1r3 k1r4 k1r5 k1r9 k1r10 using "C:\Users\Andrew\Datasets\Susenas\d1074corehousehold.dta", keep(k9r28)
ren k9r28 expend
egen hhid=group(k1r1 k1r2 k1r3 k1r4 k1r5 k1r9 k1r10)
* Cash wage=27a, In-kind wage=r27b
for any k6r27a k6r27b: recode X 99999999=.
gen wage=k6r27a+k6r27b
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
ren weind99 weight
global y=1999
gen income=1
shares

* ASSDA No 1065: Susenas 2000 
use k1r* usia k6r26 k6r27* weind00 using "C:\Users\Andrew\Datasets\Susenas\d1065coreindividualsmain.dta", clear
* Merge with hh file to get expenditure
sort k1r1 k1r2 k1r3 k1r4 k1r5 k1r8 k1r9
merge k1r1 k1r2 k1r3 k1r4 k1r5 k1r8 k1r9 using "C:\Users\Andrew\Datasets\Susenas\d1065corehouseholds.dta", keep(k9r28)
ren k9r28 expend
egen hhid=group(k1r1 k1r2 k1r3 k1r4 k1r5 k1r8 k1r9)
for var k1r* k6r26 weind00: destring X, force replace
ren usia age
*keep if age>=15 & age~=. & k6r26==4
* Cash wage=27a, In-kind wage=r27b
for any k6r27a k6r27b: recode X 99999999=.
gen wage=k6r27a+k6r27b
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
ren weind00 weight
global y=2000
gen income=1
shares

* ASSDA No 1066: Susenas 2001
* There is a net income variable (k5r15, recode missing  99999999=.), but it doesn't correspond to earlier or later Susenases
use k1r* k2r2 usia k5r15 k5r16* weind01 using "C:\Users\Andrew\Datasets\Susenas\d1066coreindividuals.dta", clear
* Merge with hh file to get expenditure
sort k1r1 k1r2 k1r3 k1r4 k1r5 k1r7 k1r8
merge k1r1 k1r2 k1r3 k1r4 k1r5 k1r7 k1r8 using "C:\Users\Andrew\Datasets\Susenas\d1066corehouseholds.dta", keep(k7r28)
tab _merge
drop _merge
ren k7r28 expend
egen hhid=group(k1r1 k1r2 k1r3 k1r4 k1r5 k1r7 k1r8)
* These two lines show that the derived hhsize=hhsize in dataset (k2r2). 
bysort hhid: egen hhsize=count(usia)
sum hhsize k2r2 if hhsize~=k2r2
drop hhsize
ren usia age
* Wage is monthly
* Cash wage=k5r16a, In-kind wage=k5r16b
for any k5r16a k5r16b: recode X .=0 99999999=.
gen wage=k5r16a+k5r16b 
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
ren weind01 weight
global y=2001
gen income=1
shares

* Susenas 2002 (income module) <<Not used>>
use b1r* b5* wert using "C:\Users\Andrew\Datasets\Susenas\income module data - susenas 2002.dta", clear
for any b1r1 b1r2 b1r3 b1r4 b1r7 b1r8: destring X, force replace
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8
merge b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 using "C:\Users\Andrew\Datasets\Susenas\core module household data - susenas 2002.dta", keep(b7r28) 
tab _merge
drop _merge
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
* For ag biz & biz income, negative variables are denoted by a variable +/-
for var b5br6k6 b5cr6k6 b5hrjk4: replace X=-X if Xs=="-"  
for var b5arjk7 b5br6k6 b5cr6k6 b5dr04k3 b5drjk3: recode X .=0 \ sum X,d
gen income=b5arjk7+(b5br6k6/12)+(b5cr6k6/4)+(b5drjk3/12)-(b5dr04k3/12)
gen expend=b7r28
ren b5arjk7 wage
ren wert weight
global y=2002.2
shares

* 2002 (core - wages only) 
* Chika uses b7r28. Daniel uses b43r24k4. If b5 is the only option, he suggests using b5hrjk2 and not b5hrjk4. 
use "C:\Users\Andrew\Datasets\Susenas\core module individual data - susenas 2002.dta", clear
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8
merge b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 using "C:\Users\Andrew\Datasets\Susenas\core module household data - susenas 2002.dta", keep(b7r28) 
tab _merge
drop _merge
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
sum hhid
gen wage=b5r30
recode wage .=0
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
gen expend=b7r28
ren weind02 weight
global y=2002.1
gen income=1
shares

* 2003
* Expenditure = block 7B, q28, col 2 (b7r28)
use "C:\Users\Andrew\Datasets\Susenas\core module individual data - susenas 2003.dta", clear
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8
merge b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 using "C:\Users\Andrew\Datasets\Susenas\core_module_household_data_susenas_2003", keep(b7r28)
tab _merge
drop _merge
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
sum hhid
gen wage=b5r31
recode wage .=0
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
gen expend=b7r28
ren weind03 weight
global y=2003
gen income=1
shares

* 2004
* Expenditure = block 7B, q28, col 2
use b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 b5r34 weind04 using "C:\Users\Andrew\Datasets\Susenas\core module individual data - susenas 2004.dta", clear
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8
merge b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 using "C:\Users\Andrew\Datasets\Susenas\core module household data - susenas 2004", keep(b7r28)
tab _merge
drop _merge
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
sum hhid
gen wage=b5r34
recode wage .=0
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
gen expend=b7r28
ren weind04 weight
global y=2004
gen income=1
shares

* 2005 (income module)
cd "C:\Users\Andrew\Datasets\Susenas\"
#delimit ;
for num 1: use "C:\Users\Andrew\Datasets\Susenas\income module part 5-1 a data - susenas 2005.dta", clear \
bysort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8: egen temp=sum(b51ak7) \ replace b51ak7=temp \ drop temp \
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 \ save temp2005_X, replace;
for num 2: use "C:\Users\Andrew\Datasets\Susenas\income module part 5-1 b data - susenas 2005.dta", clear \
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 \ save temp2005_X, replace;
for num 3: use "C:\Users\Andrew\Datasets\Susenas\income module part 5-1 c data - susenas 2005.dta", clear \
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 \ save temp2005_X, replace;
for num 4: use "C:\Users\Andrew\Datasets\Susenas\income module part 5-1 d thru g data - susenas 2005.dta", clear \
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 \ save temp2005_X, replace;
#delimit cr
use temp2005_1, clear
for num 2 3 4: sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 \ merge b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 using temp2005_X \ tab _merge \ drop _merge
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
sum hhid
for var b51ak7 b5k6b6 b5k6c6 b5k2d7 b5k2d4: recode X .=0
gen income=(b51ak7)+(b5k6b6/12)+(b5k6c6/4)+(b5k2d7/12)-(b5k2d4/12)
ren b51ak7 wage
*for any income wage: bysort hhid: egen temp=sum(X) \ replace X=temp \ drop temp
egen temp=tag(hhid) if income~=.
keep if temp==1
drop temp
gen expend=1
ren wert weight
global y=2005.2
shares

* 2005 (core - first version)
use b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 b5dr33 weind using "C:\Users\Andrew\Datasets\Susenas\core module individual data - susenas 2005.dta", clear
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
sum hhid
gen wage=b5dr33
recode wage .=0
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
gen expend=1
ren weind weight
global y=2005.1
gen income=1
shares

* 2005 (core - revised)
use b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 b5dr33 weind using "C:\Users\Andrew\Datasets\Susenas\susenas2005_core_revised.dta", clear
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
sum hhid
gen wage=b5dr33
recode wage .=0
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
gen expend=1
ren weind weight
global y=2005.1
gen income=1
shares

* Combining the share estimates 1982-2004
cd "C:\Users\Andrew\My publications\Indonesian Top Incomes\"
use shares_1982, clear
for num 1987 1990 1993 1996 1998/2001 2002.1 2002.2 2003/2004: sort year \ merge year using shares_X \ drop _merge
*gen str15 income_source="All income"
*replace income_source="Wages only" if year==1998 | year==1999 | year==2000 | year==2002
*replace income_source="Wages only" if year==1998 | year==2000
for var share*: replace X=X*100
*order year income_source
order year
format %16.0f  mean_wage_household
drop if year>=2002.199 & year<=2002.201
recode year 2002.1=2002 
for var ginie _9050e _5010e _9010e ge0e ge1e : replace X=. if year==1982
sort year
save shares_1982_2004, replace
* Appendix Tables A10-A12
use shares_1982_2004, clear
for var gini* ge*: format %9.2f X
keep year giniw ge0w ge1w ginie ge0e ge1e 
order year giniw ge0w ge1w ginie ge0e ge1e 
browse

use shares_1982_2004, clear
tsset year
tsfill
sort year

* For cutoffs (Table A13)
use shares_1982_2004, clear
for var cutoff*: replace X=X/1000000 \ format X %9.1f

* US$ millionaires in 2003 Susenas
* In 2003, US$1=8592 Rupiah
* So millionaire cutoff is R 8,592,000,000
use "C:\Users\Andrew\Datasets\Susenas\core module individual data - susenas 2003.dta", clear
sort b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8
merge b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8 using "C:\Users\Andrew\Datasets\Susenas\core_module_household_data_susenas_2003", keep(b7r28)
tab _merge
drop _merge
egen hhid=group(b1r1 b1r2 b1r3 b1r4 b1r5 b1r7 b1r8)
sum hhid
gen wage=b5r31
recode wage .=0
bysort hhid: egen temp=sum(wage)
replace wage=temp
drop temp
egen temp=tag(hhid) if wage~=.
keep if temp==1
drop temp
gen expend=b7r28
ren weind03 weight
global y=2003
gen income=1
recode wage 0=.
for any income wage: replace X=X*12 if $y~=1996
gen million=wage
recode million .=. 8592000000/max=1 *=0
tab million
